rm(list = ls())

## SOEP analysis code 
## Note that this code can only be run using SOEP data in Berlin
## The code from line 209 produces the relevant plots and one table in the paper

## Get packages and function 

library(haven)
library(tidyverse)
library(lfe)
library(broom)

## Auxilliary function to tidy felm outout

tidy_felm <- function(model, add_glance = T,
                      add_dv_stats = T,
                      add_conf_90 = T) {
  
  n <- sum(!is.na(model$residuals))
  m_tidy <- broom::tidy(model, conf.int = T)
  
  ## Combine and return
  
  out <- m_tidy %>% 
    mutate(n = n)
  
  ## 90% CI
  
  if (add_conf_90) {
    
    out <- out %>% 
      mutate(conf.low90 = estimate - qnorm(0.95) * std.error,
             conf.high90 = estimate + qnorm(0.95) * std.error)
    
  }
  
  ## Add Mean, SD, Min, Max of DV
  
  dv <- model$formula %>% str_split(' ~ ', simplify = T) %>% .[2]
  dv_mean <- model %>% augment() %>% pull(!!dv) %>% mean(na.rm = T)
  dv_sd <- model %>% augment() %>% pull(!!dv) %>% sd(na.rm = T)
  dv_min <- model %>% augment() %>% pull(!!dv) %>% min(na.rm = T)
  dv_max <- model %>% augment() %>% pull(!!dv) %>% max(na.rm = T)
  
  ## Add DV stats to output
  
  if (add_dv_stats) {
    
    out <- out %>% 
      mutate(dv_mean = dv_mean,
             dv_sd = dv_sd,
             dv_min = dv_min,
             dv_max = dv_max)
    
  }
  
  
  
  ## Add model stats
  
  if (add_glance) {
    g <- model %>% glance() %>% 
      dplyr::rename(f_stat = statistic,
                    f_pval = p.value) %>% 
      dplyr::select(matches('squared|f_'))
    
    out <- out %>% 
      mutate(rsq = g$r.squared,
             a_rsq = g$adj.r.squared,
             f_stat = g$f_stat,
             f_pval = g$f_pval)
    
    ## return
    
    out
  } else {
    out
  }
}


## Get SOEP data

so <- haven::read_dta("soep_small.dta") %>% 
  dplyr::select(pid, syear, sex, gebjahr, hid, 
                plj0433, plj0434, plj0435, 
                plj0436, plj0437, plj0438, plj0439, plj0046,
                pgisced11, plb0022_h, migback)

## Merge KKZ (county codes)
## This file is only accessible in the DIW in Berlin

kkz <- haven::read_dta('regionl.dta') %>%
  dplyr::select(hid, syear, kkz) %>%
  filter(syear >= 2010) %>%
  mutate(kkz = ifelse(nchar(kkz) == 4, paste('0', kkz, sep = ''), kkz),
         kkz = ifelse(kkz < 0, NA, kkz),
         kkz = as.character(kkz)) 

## Merge to main file

so <- so %>%
  left_join(kkz)

## Remove missing county IDs

so <- so %>%
  filter(!is.na(kkz)) 

## Dealing with outcome, missings etc

so <- so %>% 
  mutate(across(c(plj0433, plj0434, plj0435, 
                  plj0436, plj0437, plj0438, plj0439,
                  plj0046, syear, gebjahr, pgisced11,
                  migback, plb0022_h),
                as.numeric)) %>%                       ## To numeric
  mutate(across(c(plj0433, plj0434, plj0435, 
                  plj0436, plj0437, plj0438, plj0439,
                  plj0046, syear, gebjahr, pgisced11,
                  migback, plb0022_h),                 
                ~ifelse(.<0, NA, .))) %>%              ## <0 to missing
  mutate(age = syear - gebjahr)                        ## age


## Get treated by county file 

tbc <- readRDS('treated_by_county.rds') %>%
  dplyr::select(-county) %>% 
  dplyr::rename(county = county.id) %>% 
  dplyr::select(county, AA.district.id, treated) %>%
  mutate(county = as.character(county))

## Merge to main file, declare treated and treated * post 
## Keep only 2016 and 2018

so <- so %>% 
  left_join(tbc, by = c('kkz' = 'county'))%>% 
  mutate(state = substr(kkz, 1, 2)) %>%
  mutate(treated = ifelse(state == 11, 1, treated)) %>% ## code Berlin 
  mutate(treated = ifelse(kkz == '03152', 1, treated)) %>% ## code Goettingen 
  mutate(treated = ifelse(kkz == '03156', 1, treated)) %>% ## code Osterode am Harz 
  mutate(post = ifelse(syear > 2016, 1, 0),
         treated_post = treated*post) %>% 
  filter(syear %in% c(2016, 2018))

## Only do this for NRW/BY

so <- so %>% 
  filter(state %in% c("05", "09"))

## Def outcomes

outcomevars <- c("plj0433", "plj0434", "plj0435", 
                 "plj0436", "plj0437", "plj0438", "plj0439", "plj0046")

## Estimate DiD as loop
## Loop over outcomes, estimate two period DiD for each of these

soep <- lapply(outcomevars, function(x){
  
  ## Print outcome

      cat(x, '\n')

      ## Formula
      
      did <- as.formula(paste0(x, ' ~ treated + treated_post + post', 
                               '| 0 | 0 | AA.district.id'))
      
      ## Model: Full Sample 
      
      m1 <- felm(did, data = so) 
      
      ## Model: Age high low
      
      m2 <- felm(did, data = subset(so, age < 26)) ## changed this to align with wage analyses 
      m3 <- felm(did, data = subset(so, age > 65)) 
      
      ## Model: Employment status
      
      m4 <- felm(did, data = subset(so, plb0022_h == 9)) 
      m5 <- felm(did, data = subset(so, plb0022_h == 1)) 
      m5b <- felm(did, data = subset(so, plb0022_h == 2)) 
      
      mlist <- list(m1, m2, m3, m4, m5, m5b)
      
      ## Clean Up 
      
      mlist <- lapply(mlist, tidy_felm) %>% 
        reduce(rbind) %>% 
        filter(str_detect(term, 'treated_post')) %>%
        mutate(outcome = x,
               ss = c('full',
                      'young < 26',
                      'old > 65',
                      'unemployed',
                      'full-time employed',
                      'part-time employed'))
      
      ## Return
      
      return(mlist)
}) %>% reduce(rbind)

## ## ## Plots from here ## ## ## 

soep <- read_rds("data/soep_results.rds")

## Recode

p_df_new2states <- soep %>% 
  filter(outcome == 'plj0046') %>% 
  filter(ss %in% c('full', 'full-time employed', 'unemployed',
                   'part-time employed', 'young < 26')) %>% 
  mutate(ss = dplyr::recode(ss, 
                            `full` = 'All respondents',
                            `part-time employed` = 'Part-time employed',
                            `full-time employed` = 'Full-time employed',
                            `unemployed` = 'Unemployed',
                            `young < 26` = '18-25 years old')) %>% 
  mutate(ss = paste0(ss, '\n(N =', format(n, big.mark = ','), ')')) %>%
  mutate(ss = factor(ss, levels = unique(ss)[c(2,5,4,3,1)])) %>% 
  mutate(across(c(estimate, conf.low, conf.high),
                ~-.))

## ## 

p_df_new2states %>% 
  dplyr::select(ss, estimate, p.value, n)

## Standardize results 

p_df_new2states <- p_df_new2states %>% 
  mutate(across(c(estimate, conf.low, conf.high),
                ~./dv_sd))

#### Figure 5: Effect of Policy on Attitudes towards Immigration####

p1 <- p_df_new2states %>% 
  filter(!str_detect(ss, '18-25 years old')) %>% 
  ggplot(aes(x = ss, y = estimate)) +
  geom_hline(yintercept = 0, linetype = 'dotted', color = 'grey40') +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0) +
  geom_point(shape = 21, fill = 'white', size = 2) +
  theme_bw() +
  ylab('Effect on worries about\nimmigration to Germany\n(standard deviations)') +
  theme(axis.title.y = element_blank()) +
  coord_flip()
p1

#### Table A.13: Details on the panel survey results #### 

l_out <- p_df_new2states %>% 
  filter(term == 'treated_post') %>% 
  mutate(outcomes = "Worries about immigration") %>% 
  dplyr::select(outcomes, ss, estimate, 
                std.error, p.value, n, rsq) %>% 
  mutate(across(3:7, ~round(., 3))) %>% 
  mutate(n = base::format(n,big.mark = ',')) %>% 
  slice(1, 3:5, 2)

## Table

library(kableExtra)

l_out %>%
  kableExtra::kable(., format = 'latex', 
                    booktabs = T,
                    linesep = "",
                    caption = 'Details on the panel survey results',
                    col.names = c('Outcome', 'Sample',
                                  'Estimate', 'SE', 'p-value', 'N',
                                  'R-squared')) %>%
  kable_styling(latex_options = c("hold_position", 'scale_down'), 
                position = "center") 

## Rename other outcomes

olist <- unique(soep$outcome)
onames <- c("1 good for economy", "2 good for culture", "3 good for life", "4 refugees risky (reversed)",
            "5 refugees risky (reversed, v2)", "6 donate for refugees", "7 donate for refugees (v2)",
            "8 worried about immigration (reversed)")
dict_df <- data.frame(outcome = olist, name = onames)

## Transform prior to plotting  

p_df_new2states <- soep %>% 
  filter(ss %in% c('full', 'full-time employed', 'unemployed')) %>% 
  mutate(ss = dplyr::recode(ss, 
                            `full` = 'All respondents',
                            `part-time employed` = 'Part-time',
                            `full-time employed` = 'Respondents in\nfull-time employment',
                            `unemployed` = 'Unemployed\nrespondents')) %>%
  left_join(dict_df)

## Plot

plot_df <- p_df_new2states %>% 
  filter(substr(name, 1, 1) %in% as.character(c(1, 2, 3))) %>% 
  filter(ss == 'All respondents') %>% 
  mutate(name_new = case_when(str_detect(name, '1') ~ 'Refugees good for\nGerman economy',
                              str_detect(name, '2') ~ 'Refugees good for\ncultural life in Germany',
                              str_detect(name, '3') ~ 'Refugees good for\nGermany as a place to live')) %>% 
  mutate(name_new = paste0(name_new, '\n(N = ', format(n, big.mark = ','), ')')) %>% 
  mutate(name_new = fct_reorder(name_new, name, .desc = T))

#### Figure A.14: Effect of Policy on Attitudes towards Refugees ####

p1 <- plot_df %>%  
  ggplot(aes(x = name_new, y = estimate)) +
  geom_hline(yintercept = 0, linetype = 'dotted', color = 'grey40') +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0) +
  geom_point(shape = 21, fill = 'white', size = 2) +
  theme_bw() +
  ylab('Effect of Liberalization\n(standard deviations)') +
  theme(axis.title.y = element_blank()) +
  coord_flip()
p1

